rm(list=ls())
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ade4)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(readxl)
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(igraph)
##
## Attaching package: 'igraph'
##
## The following object is masked from 'package:plotly':
##
## groups
##
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
##
## The following objects are masked from 'package:purrr':
##
## compose, simplify
##
## The following object is masked from 'package:tidyr':
##
## crossing
##
## The following object is masked from 'package:tibble':
##
## as_data_frame
##
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
##
## The following object is masked from 'package:base':
##
## union
library(viridis)
## Loading required package: viridisLite
library(ggpubr)
library(ggplot2)
library(ggrepel)
library(devtools)
## Loading required package: usethis
library(processx)
We can’t precisely compare datasets from Keren and Wagner paper because they don’t use the same labels for cell types. Hence, we will build a reference space containing the cell types we selected. We use some tools from information theory in order to do it: bipartite graphs. Each side would represent either Keren or Wagner data, the vertices would represent the cell types in different datasets. While edges would represent the link or the mapping, i.e we would link Keratin positive tumors and tumors cell types to Epithelial Cells in Wagner dataset because we know that in this paper they used markers of cancer cells and cytokeratins (panKeratin, K17, K5, etc…) to identify epithelial cells.
dirName <- dirname(rstudioapi::getSourceEditorContext()$path )
setwd(dirName)#setwd(dir = "/scratch/anissa.el/ImmuneStates/wagner_analysis")
source ("./scripts/R/Wagner_Keren_functions.r")
CellsPropBC2 <- readRDS("./data/scBCpreDiss.rds")
CellsProps.postDiss2 <- readRDS('./outputs/scBCpostDiss.rds')
To solve this problem, we will use the incidence matrix, let’s call It G. This matrix (of 0 or 1) represents the links between some objects (cell types across 2 datasets here). This will be the base for our set of reference cell types.
## New names:
## • `` -> `...1`
## png
## 2
Then, we are going to create two Matrices that will allow us to convert proportions of cell types from a dataset to our reference set, let’s call them respectively Gk and Gw for Keren and Wagner datasets.
G –> Gk : Sum up all the columns, if one is >1 , take the first row that is = 1 in this column, give It the same name as column and remove the other rows that were equal to 1 in this column.
G –> Gw : Sum up all the rows, if one is >1 , take the first column that is = 1 in this row, give It the same name as row and remove the other columns that were equal to 1 in this row.
Then, we will produce two matrices Yk and Yw that represent the new datasets that represent the proportions of cell types from the reference across samples for respectively Keren and Wagner dataset.
Thus Yk = Gk.XkT (Xk : matrix of cells proportions from Keren dataset) and YwT = Gw. XwT
Let’s compute the matrices Gk and Gw that will be used to do transformation :
GMatrix <- as.matrix(GMatrix)
Gk <- compute_Gk_matrix(GMatrix)
Gw <- compute_Gw_matrix(GMatrix)
# We will filter the tumors that contain too much Healthy tissue, we put the threshold at 50 %
#CellsPropBC2.filt <- CellsPropBC2%>%filter(Other<=50)
Yw <- t(Gw%*%t(as.matrix(CellsPropBC2)))
Yw2 <- t(Gw%*%t(as.matrix(CellsProps.postDiss2))) #t(Gw%*%t(as.matrix(newCellsProps)))
rownames(Yw) <- rownames(CellsPropBC2)
rownames(Yw2) <-rownames(CellsProps.postDiss2) #rownames(newCellsProps)
#colnames(Yw)[9] <- "EndothelialCells"
## FIXME: Yw and Ybb should have exactly the same cell labels !!
colnames(Yw2) <- str_replace_all(colnames(Yw2),c(Endothelial="EndothelialCells"))
colnames(Yw) <- str_replace_all(colnames(Yw),c(Endothelial="EndothelialCells"))
#rowSums(Yw)
We fetch data from building blocks analysis in order to transform the cell abundances in the new cell reference space ==> 10 cell types.
## Joining, by = c("sample_id", "EndothelialCells", "EpithelialCells",
## "Mesenchymal-like", "NK", "B", "DC", "CD4-T", "CD8-T", "Macrophages",
## "OtherImmune", "group")
## Joining, by = c("sample_id", "EndothelialCells", "EpithelialCells",
## "Mesenchymal-like", "NK", "B", "DC", "CD4-T", "CD8-T", "Macrophages",
## "OtherImmune", "group")
## Joining, by = c("sample_id", "EndothelialCells", "EpithelialCells",
## "Mesenchymal-like", "NK", "B", "DC", "CD4-T", "CD8-T", "Macrophages",
## "OtherImmune", "group")
Let’s compare proportions of cells (in our new cells-space reference) across the datasets :
CellsPropsk <- gather(as_tibble(Yk),key = "cellType",value = "proportion")%>%
mutate(dataset = "Keren")
CellsPropsw <- gather(as_tibble(Yw),key = "cellType",value = "proportion")%>%
mutate(dataset = "Wagner")
CellsPropswk <- bind_rows(CellsPropsk,CellsPropsw)
plot1 <- ggplot(data = CellsPropswk,
aes(x = dataset,y = proportion,color = dataset,group = dataset))+
geom_point()+
facet_wrap(~cellType)+
theme(axis.text.x = element_text(angle = 45,hjust = 0.8,vjust = 0.5))+
labs(title = "Boxplot of proportions of cell types from tumors from Keren & Wagner datasets")+
xlab ("") + ylab("proportion")
plot1
The distributions of cells proportions seem to be homogeneous across the datasets after the transformation in the new cells reference space, except for Mesenchymal-like cells. In the samples taken from Wagner data, there is a mix between tumor and healthy tissue(cells included in the group Mesenchymal-like). In general, all the cell types seem to be comparable, so the cells reference space that has been computed allows us compare data from two different datasets such as Wagner and Keren.
#Wagner macro 2 is the new set of proportions obtained after dissection on the last
Yw_ref<- Y%>%filter(group=="wagner_macro2")%>%column_to_rownames(var="sample_id")%>%dplyr::select(-(group))
scBCPCA.pure <- dudi.pca(Yw_ref,scale = FALSE, center = TRUE, scannf = F, nf=3)#dudi.pca(Yw2, scale = FALSE, center = TRUE, scannf = F, nf=3)
scBCPCA.pure$eig
## [1] 863.79353133 95.32302013 44.86482802 26.32102306 24.31962142
## [6] 15.94159702 12.13962976 1.47584860 0.07339742
fviz_eig(scBCPCA.pure)
fviz_pca_biplot(scBCPCA.pure,col.var = 'indianred2',col.ind = 'steelblue2', repel = TRUE,max.overlaps = 3)
## Warning: ggrepel: 127 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
fviz_pca_biplot(scBCPCA.pure,axes = c(2,3),col.var = 'indianred2',col.ind = 'steelblue2', repel = TRUE,max.overlaps = 3)
## Warning: ggrepel: 122 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
fviz_pca_biplot(scBCPCA.pure,axes = c(1,3),col.var = 'indianred2',col.ind = 'steelblue2', repel = TRUE,max.overlaps = 3)
## Warning: ggrepel: 126 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
#Yk.weigths <- t(decenteredWeigths.prop)%*% Gk
CellsPropsk <- gather(as_tibble(Yk),key = "cellType",value = "proportion")%>%
mutate(dataset = "Keren")
CellsPropsw2 <- gather(as_tibble(Yw2),key = "cellType",value = "proportion")%>%
mutate(dataset = "Wagner")
CellsPropswk2 <- bind_rows(CellsPropsk,CellsPropsw2)
ggplot(data = CellsPropswk2,aes(x = dataset,y = proportion,color = dataset,group = dataset))+
geom_violin()+
facet_wrap(~cellType)+
theme(axis.text.x = element_text(angle = 45,hjust = 0.8,vjust = 0.5))+
labs(title = "Violin plots of proportions of cell types from tumors from Keren & Wagner datasets")+
xlab ("") + ylab("proportion")
fig9 <- plot_ly(x = scBCPCA.pure$li$Axis1,
y = scBCPCA.pure$li$Axis2,
z = scBCPCA.pure$li$Axis3,
#color=~Yw_ref[,"EpithelialCells"],
showlegend = TRUE,
name = "wagner tumors",
type = "scatter3d", mode = "markers",
marker = list(symbol = "triangle",size = 4),
mode = "text")
PC1 <- as.matrix(scBCPCA.pure$c1$CS1,ncol=1)
PC2 <- as.matrix(scBCPCA.pure$c1$CS2,ncol=1)
PC3 <- as.matrix(scBCPCA.pure$c1$CS3,ncol=1)
Ybb_proj1 <- (scale(Ybb,center = scBCPCA.pure$cent,scale = FALSE)%*% PC1)
Ybb_proj2 <- (scale(Ybb,center = scBCPCA.pure$cent,scale = FALSE)%*% PC2)
Ybb_proj3 <- (scale(Ybb,center = scBCPCA.pure$cent,scale = FALSE)%*% PC3)
PC <- as.matrix(scBCPCA.pure$c1)
TMENS_proj <- (scale(Y%>%filter(group=="keren_micro")%>%column_to_rownames(var="sample_id")%>%dplyr::select(-(group)),
center = scBCPCA.pure$cent,scale = FALSE)%*%PC)
#Purecancer<- (scale(t(c(0,100,0,0,0,0,0,0,0,0)),center=scBCPCA.pure$cent,scale=FALSE)%*%PC)
#TMENS_proj2 <- (scale((t(decenteredWeigths.prop)%*% Gk),center =scBCPCA.pure$cent ,scale = FALSE))%*%PC
fig9 <- fig9 %>% add_trace(x = TMENS_proj[,1],#as.vector(Ybb_proj1),#Purecancer[,1],
y = TMENS_proj[,2],#as.vector(Ybb_proj2),#Purecancer[,2],
z =TMENS_proj[,3],#as.vector(Ybb_proj3),#Purecancer[,3],
type = "scatter3d",
mode = "markers",
text = rownames((Ybb)),#"pure cancer",
showlegend = TRUE,
name = "TMENs",
marker = list(color=~c("#D463DF" ,"#FF0000","#1E90FF","#000000"),symbol = "star-diamond",size = 12),
inherit = FALSE)
fig9 <- fig9 %>% layout(scene = list(xaxis = list(title = "PC1"),
yaxis = list(title = "PC2"),
zaxis = list(title = "PC3")),
title = "PCA on reference cell proportions space of BC tumors")
fig9
rm(PC)
In 2 dimensions, we have this plot:
TMENS_3PC <- as_tibble(TMENS_proj,rownames=NA)%>%rownames_to_column(var="TMEN")%>%mutate(TMEN =str_replace_all(TMEN,c(BB1 = "TLS",BB2 = "Inflammatory",BB3 = "Cancer",BB4 = "Fibrotic")))
ggplot(data = TMENS_3PC)+
geom_point(aes(x=CS1,y=CS2,color=TMEN),size = 6)+
scale_color_manual("TMEN",values = c("TLS" ="#D463DF" ,#pink
"Inflammatory" = "#FF0000", #red
"Cancer" = "#1E90FF", #dodgerblue
"Fibrotic" = "#000000"))+#black
xlab("PC1") + ylab("PC2")+
geom_point(data = as_tibble(scBCPCA.pure$li),aes(x= Axis1,y = Axis2),color ="#4682B4",group="tumors" ) #steelblue "#4682B4", lightseagreen "#20B2AA"
ggsave("./figs/TMENs_macro.pdf", height = 3, width = 5)
library(reticulate)
nbShuffles <- 10
nPC <- length(colnames(Yw_ref)) -1
#EigenVals.rand <- matrix(nrow = nbShuffles,ncol = nPC)
set.seed(25) # for reproducibility
randomData <- data.frame(shuffle_idx=integer(),archetype=character(),PC1 = double(), PC2 = double())
# data1 = sys.argv[1]
# evs = sys.argv[2]
# means = sys.argv[3]
# nbArchs = sys.argv[4]
# outputPath1 = sys.argv[5]
nbArch <-3
for (i in 1:nbShuffles){
CellsPropBC.rand <- apply(Yw_ref,MARGIN = 2,function(x) sample(x,size = length(x),replace = FALSE)) ## Column-wise operation ==> MARGIN=2 #apply(CellsPropBC2,MARGIN = 2,function(x) fyshuffle(x))
sumsSamples <- apply(CellsPropBC.rand,MARGIN = 1,function(x) sum(x))
CellsPropBC.rand2 <- (CellsPropBC.rand/sumsSamples)*100
pcaData <- dudi.pca(CellsPropBC.rand2,scale = FALSE,center = TRUE,nf = 2, scannf = FALSE)
randData <- as.matrix(pcaData$li[,1:2])
eigVects <- as.matrix(pcaData$c1[,1:2])
means <- as.matrix(pcaData$cent)
#system("python3 /scratch/anissa.el/ImmuneStates/wagner_analysis/scripts/python/archetype_analysis.py 'randData' 'eigVects' 'means' 3 '/scratch/anissa.el/ImmuneStates/wagner_analysis/outputs/' ",wait = FALSE)
#py_run_file("/scratch/anissa.el/ImmuneStates/wagner_analysis/scripts/python/archetype_analysis.py", local = FALSE, convert = TRUE)
py_run_string("import numpy as np")
py_run_string("import sys")
py_run_string("sys.path.insert(1, '../TMENS_analysis/src/utils')")
py_run_string("from archetypes import ArchetypalAnalysis")
py_run_string("import pandas as pd")
#py_run_string("pc3dWagner =np.array(randData, dtype = 'float64')")
py_run_string("def compute_archetypes(randData,nbArch,eigVects,means):
pc3dWagner = randData
AA = ArchetypalAnalysis(n_archetypes = nbArch,
tolerance = 0.001,
max_iter = 200,
random_state = 0,
C = 0.0001,
initialize = 'random',
redundancy_try = 30)
AA.fit_transform(pc3dWagner)
archsCellsSpace = np.dot(AA.archetypes.T,eigVects.T) + means.T
return archsCellsSpace.T")
#py_run_string("AA.fit_transform(pc3dWagner)")
#py_run_string("archsCellsSpace = np.dot(AA.archetypes.T,eigVects) + np.array(means,dtype='float64')")
ArchsCoords1 <- (py$compute_archetypes(randData,as.integer(nbArch),eigVects,means))
#print(length(eigs))
#EigenVals.rand[i,]<- cumsum(eigs/sum(eigs)*100)
ArchsProj <- scale(t(ArchsCoords1),scale=FALSE,center=scBCPCA.pure$cent)%*%as.matrix(scBCPCA.pure$c1[,1:2])
#Archs <- read.csv("/scratch/anissa.el/ImmuneStates/wagner_analysis/outputs/shuffled_data_archetypes.csv")
#shuffledData <- data.frame(cell_type = colnames(CellsPropBC2),shuffle_idx = rep(i,length(colnames(CellsPropBC2))), mean_cells = means,PC1 = eigVects[,1], PC2 = eigVects[,2],arch1 = ArchsCoords1[,1] ,arch2 =ArchsCoords1[,2] ,arch3 =ArchsCoords1[,3],origPC1 = scBCPCA$c1[,1],origPC2 = scBCPCA$c1[,2], origMeans = scBCPCA$cent)
shuffledData <- data.frame(shuffle_idx = rep(i,nbArch),archetype = c("ARCH1","ARCH2","ARCH3"),PC1 = ArchsProj[,1],PC2=ArchsProj[,2])
randomData <- rbind(randomData,shuffledData)
}
### Projection onto the original PC space
#origpC <- scBCPCA$c1[,1:2]
#origMeans <- scBCPCA$cent
ggplot()+
geom_point(data=as_tibble(scBCPCA.pure$li,rownames=NA),aes(x=Axis1,y=Axis2),color="red")+
geom_point(data=randomData,aes(x=PC1,y=PC2,group=factor(shuffle_idx)),color="grey")+
geom_polygon(data=randomData,aes(x=PC1,y=PC2,group=factor(shuffle_idx)),color="grey", fill=NA)+
geom_point(data= TMENS_3PC,aes(x=CS1,y=CS2))+
geom_polygon(data=TMENS_3PC%>%filter(TMEN %in%c("Cancer","TLS","Fibrotic")),aes(x=CS1,y=CS2),color="black",fill=NA)
The cloud of points from Wagner tumors projected in this 3PC space seems to be driven by the 3 building blocks, but for some tumor samples there is a high amount of non-cancer/immune cells which seems to be healthy tissue that we should get rid of.
So the next steps consist in removing the “healthy” part of the sample. This will consist first, in getting the average cell abundance of healthy tissue in breast : It is mainly composed of adipocytes (the most abundant cell type), a little bit of fibroblasts and a very little (but still significant) proportion of tissue resident macrophages. We consider them as part of the healthy tissue because they were usually located in the tissue before the beginning of the cancer, they weren’t recruited by the immune system, their presence is not a consequence of tumor appearance.
We are going to discard the healthy part of the samples from Wagner dataset in a computational way, because this is the only way we can try to tweak the samples, based only on cell abundance. Let’s visualize It in the 3D space. If we go back to our PCA space (Keren dataset), and plot points as tumors from Wagner dataset. The cloud of points can be represented as a simplex (as seen before) which vertices represent a cell type or group of cell types that are the most abundant and hence shape the dataset. There is a vertex representing the healthy tissue contained inside the sample. We would do an orthogonal projection of the samples which amount of other cells are quite high on a plane defined by building blocks :
## null device
## 1
We will build a plane driven by 2 main vectors : one that represents the variation of proportions of cancer cells, let’s call It x_c and another now representing the variation of every infiltrated immune cell (x_i). We will project our tumors in this plane in order to eliminate in silico the parts in the tumors samples that are not from the tumor micro environment. We will also put a threshold from which we consider that the sample is not exploitable because of a high proportion of healthy tissue : if we have more than 50 % of mesenchymal-like cells, we discard the sample from our analysis (otherwise, If we keep It, we will have a wider error bar).
We would do an Archetypal Analysis in the PCA space of Wagner tumors :
## array([[-3.25145045e+00, -1.35289176e+01, 2.12129450e+00],
## [-4.36246594e+00, -1.88231387e+01, 2.14303655e+00],
## [-5.24127377e+01, 1.23040576e+00, 3.01862271e+00],
## [-2.50826004e+01, 6.48123129e+00, 1.94562306e+00],
## [ 4.76572846e+01, 3.98418912e+01, 3.48800046e+00],
## [ 2.94485030e+01, 2.44139894e+01, -1.12439111e+00],
## [ 2.63110588e+01, -7.63619079e+00, 8.25236834e+00],
## [-3.84987634e+01, 1.35651704e+01, 3.61879194e+00],
## [ 3.37020016e+01, 2.83993844e+01, 4.54272782e+00],
## [-5.46814574e+01, 3.49742098e+00, 4.00006612e+00],
## [ 2.30297493e+01, 1.29606232e+01, 2.03120261e+00],
## [-3.87858014e+01, 2.50134941e-02, 1.86668416e+00],
## [ 5.17411668e+01, 4.90429768e+01, 3.56181330e+00],
## [-2.72938470e+01, -9.72842355e+00, 4.50068592e+00],
## [-1.85620653e+01, 5.44063361e+00, 5.20464398e+00],
## [-1.83973227e+01, 1.57740931e+01, -4.95543900e-01],
## [-4.93736256e+01, 8.73898582e-01, 2.52087663e+00],
## [ 2.11070485e+01, -8.93837006e+00, -2.83258562e+01],
## [ 2.64797456e+01, -3.22277955e+01, 1.00004008e+01],
## [ 2.17471130e+01, 1.62028883e+01, 7.21583803e-01],
## [ 1.12880449e+01, -9.45276253e+00, -2.70935639e+01],
## [-3.08117383e+01, 3.91753085e+00, 2.31664309e+00],
## [-2.24466815e+01, -1.28996629e+01, 4.86577321e+00],
## [ 2.68530517e+01, 3.49778637e+01, 4.94167779e-01],
## [ 1.46612981e+01, -9.57286725e+00, 2.71308790e+00],
## [ 1.93155905e+01, 1.51093456e+01, -8.75540470e-01],
## [ 2.94030039e+01, 2.36247964e+01, 2.62431653e+00],
## [ 1.81788237e+01, 2.16599101e+01, 1.09897339e-01],
## [ 4.99961798e+01, 4.45619899e+01, 2.26916831e+00],
## [ 3.79821732e+01, 2.71763176e+00, 5.30582726e+00],
## [ 3.25898508e+01, -5.47474289e-01, 5.87249757e+00],
## [ 7.20849511e+00, -7.23363294e+00, 1.04356283e+00],
## [ 2.12060849e+00, -2.08731211e+01, 2.62566696e+00],
## [-1.91625632e+01, 7.80491089e+00, 4.94463205e+00],
## [ 2.86083343e+01, -8.24057860e+00, 5.49566932e+00],
## [ 3.30177535e+01, -5.23528694e+00, 1.07960919e+00],
## [ 5.99732079e+00, 2.36921186e+01, 9.49934496e-01],
## [-1.95931124e+00, -1.68905277e+01, 2.64695362e+00],
## [-1.36186820e+01, 2.05707828e+01, 3.88105890e+00],
## [ 6.52023081e+00, 3.12644139e+01, 3.59037101e+00],
## [ 2.45295644e+01, 2.30296091e+01, 3.73549128e+00],
## [ 3.33856986e+01, 3.83216602e+01, 3.15457187e+00],
## [ 2.93267459e+01, -2.88629377e+01, 7.20433297e+00],
## [ 3.82170718e+01, 4.20336416e+01, 3.64049126e+00],
## [ 2.68143476e+01, 2.85566775e+01, -4.12036197e+00],
## [-5.85429742e+01, 4.03876079e+00, 3.28939651e+00],
## [-1.94263637e+00, 1.62435942e+01, 1.30323380e+00],
## [ 9.28209130e-01, -2.14211758e+01, 2.95053754e+00],
## [ 2.69363476e+01, -1.33315715e+01, -1.56331850e+01],
## [ 3.60482188e+01, 1.14161133e+01, 2.75742526e+00],
## [ 3.91903277e+01, 2.79440394e+01, 2.71679874e+00],
## [ 1.51441119e+00, 1.97342140e+01, -7.09490902e-01],
## [ 7.48727541e-01, 1.63923664e+01, -1.38698728e+00],
## [-6.24910032e+01, 4.13264243e+00, 3.63394285e+00],
## [ 1.02267409e+01, -1.32597166e+00, 6.49810055e-01],
## [-7.11545915e+00, 2.05073174e+01, 3.87358516e-01],
## [-1.83843258e+01, 4.87328937e+00, 3.80318339e+00],
## [-4.42531301e+01, 6.32318836e+00, 3.22923285e+00],
## [ 2.07138969e+01, 2.13469656e+01, 3.29968012e+00],
## [-8.54107260e-01, -1.05808628e+01, 3.33018831e+00],
## [-6.68113428e+00, -7.82583788e+00, -1.64912251e+01],
## [ 1.81557723e+01, 2.56125591e+01, -7.99084820e-01],
## [-1.73295376e+00, 5.34708525e+00, 2.97928450e+00],
## [ 3.18465194e+01, -3.87121093e+01, 1.15786234e+01],
## [ 3.79899970e-01, 1.88126676e+01, 6.63402218e-01],
## [-2.11895129e+01, -4.62684525e+00, 1.52144318e+00],
## [ 3.67476939e+01, 3.95683303e+00, -3.45907434e+00],
## [-2.14628738e+01, 1.03421461e+01, 1.67463036e+00],
## [ 3.82883301e+01, -8.36461932e+00, 8.78927643e+00],
## [ 3.44385164e+01, 1.68758536e+01, 3.65819567e+00],
## [-3.22673850e+01, -4.49146375e+00, 8.82280793e-01],
## [-3.44041756e+01, -1.69459308e+00, 1.38266700e+00],
## [ 2.81263429e+01, 3.19825420e+01, 2.57561128e+00],
## [-1.31257808e+01, 1.30821999e+01, 2.64453518e+00],
## [ 3.23332452e+01, -3.06353533e+01, 7.70267408e+00],
## [ 3.72780397e+00, -1.88366660e+01, 7.66083467e+00],
## [ 1.24164671e+01, -1.56145938e+01, -3.66384476e+00],
## [ 2.62154742e+01, -3.18589658e+01, 9.04728928e+00],
## [ 2.45429136e+01, -3.53951098e+01, 1.09635609e+01],
## [-1.56225569e+01, 5.66816605e+00, -1.37950798e+00],
## [ 3.53715687e+00, -1.71420472e+01, 7.65969268e+00],
## [ 2.31095777e+00, -8.05538898e+00, 2.56664414e+00],
## [-1.53282550e+01, -8.82166837e+00, -2.36231624e+00],
## [-7.86634006e+00, -9.61817650e+00, -1.22694981e+01],
## [ 1.01000570e+01, -2.01404948e+01, 8.87653526e+00],
## [ 2.41825024e+01, -2.03393880e+01, 2.96689222e+00],
## [ 3.24639884e+01, -3.59886448e+01, 1.13298314e+01],
## [-7.64989471e+00, -1.11453841e+01, -9.60173366e+00],
## [-5.25252614e+01, 2.22258136e+00, 3.00078515e+00],
## [ 1.22571054e+01, -1.06882193e+01, 1.71501483e+00],
## [-2.64813858e+01, 5.33809743e+00, 2.43635052e-01],
## [ 1.32691496e+01, -3.00219406e+01, 9.54501791e+00],
## [-6.83981009e+00, -6.11477310e+00, -1.95436961e+01],
## [-1.48362153e+00, -1.12717019e+01, 1.37744522e+00],
## [-3.67100312e+01, -7.45487521e-01, 3.84081346e+00],
## [-5.66175337e+01, 5.00775664e+00, 3.77382865e+00],
## [ 3.63162725e+00, -1.12029696e+01, -2.07678726e+00],
## [ 2.78579801e+01, -1.77502830e+01, -7.54512405e+00],
## [ 1.09165109e+00, 1.68993317e+01, -5.21667737e+00],
## [ 1.25843153e+01, -2.56990523e+01, 3.71699672e+00],
## [-3.36988606e+01, -1.16904516e+00, -8.35743461e+00],
## [-2.25784403e+01, 4.35388300e+00, -1.42342905e+00],
## [ 2.19272120e+01, -1.08527688e+01, -2.88642763e+01],
## [-1.62304799e+01, 3.48799301e+00, 1.69587306e+00],
## [ 3.74095299e+01, 1.01690420e+01, -2.28144151e+00],
## [-1.00485019e+01, -2.54717647e+00, -1.60630025e+01],
## [-6.17727971e+01, 4.41498742e+00, 3.63348717e+00],
## [-8.06914249e+00, -1.32802582e+00, -9.48084493e+00],
## [-4.68427391e+01, 1.50230676e+00, -2.70237001e+00],
## [-4.71097786e+00, 1.29842415e+00, -4.50303933e+00],
## [-1.06254570e+00, -1.17513056e+01, -2.20589235e+00],
## [-3.75396573e+01, 5.86742463e+00, 1.55285313e+00],
## [-2.49552728e+01, 1.69342820e+00, 1.70093866e+00],
## [-6.07966811e+01, 3.82066077e+00, 2.92830545e+00],
## [ 2.66977950e+01, -3.20712892e+01, 5.88604136e+00],
## [ 2.96024123e+01, -3.34665032e+01, 3.55286879e+00],
## [ 2.22534178e+01, -2.85332234e+01, -2.47031899e-02],
## [-3.87958526e+01, -1.56816051e+00, 1.85559610e-01],
## [ 1.52114371e+01, -9.96356133e+00, 4.29271579e+00],
## [-5.33855471e+01, 5.26013046e+00, 3.45625855e+00],
## [ 1.54946884e+00, -1.79686163e+01, 4.35536832e+00],
## [-2.75558030e+01, -2.30019785e+00, -1.09158731e+01],
## [ 1.98043293e+01, -2.83225162e+01, 1.44078676e+00],
## [ 2.14632518e+01, -1.50348627e+01, 2.48015252e+00],
## [ 2.31045367e+01, -1.68003841e+01, -2.24252485e+01],
## [-3.25556929e+01, 5.11251630e+00, 3.46527499e+00],
## [ 2.65057012e+01, -7.35955266e+00, -4.64025176e+00],
## [ 3.04088130e+01, -3.85044229e-01, -5.55891889e+00],
## [-1.17765011e+01, 6.31568833e+00, -4.35981998e-02],
## [-7.17689848e+00, 8.47124588e+00, -5.51696048e+00],
## [-2.28652474e+01, 3.25094749e-01, -2.83940380e+00],
## [-2.09816139e+00, -8.17608473e+00, -1.04965051e+00],
## [ 3.60159410e+01, 2.77962371e+01, -1.69572430e+00],
## [ 9.48280522e+00, -9.65399066e+00, -1.78728077e+01],
## [-4.98196322e+01, 7.01202326e+00, 2.80934533e+00],
## [-7.97510359e+00, 1.63301560e+01, 2.16688430e-01],
## [ 3.84379409e+00, 4.91387494e+00, -4.22870761e-01],
## [-4.13232695e+01, 1.63396504e+00, 2.93395723e+00],
## [-2.41108316e+01, -1.09314246e+01, 2.47748051e+00],
## [-3.29936036e+01, 2.75311873e+00, 1.09744818e+00],
## [ 2.58486870e+01, -2.24548154e+01, 1.27776510e+00],
## [ 3.05327303e+01, -2.30673589e+01, -1.19679604e+00],
## [-4.04454780e+01, -6.87911284e-02, -5.28416467e+00]])
The archetypal analysis (method used to determine building blocks in Keren dataset) shows three main archetypes that correspond to different types of tissue :
Archetype 1 : Tumors with only cancer cells
Archetype 2 : CD4 and CD8 cells
Archetype 3 : Tumors mainly made by healthy tissue (they would be dissected afterwards)
Archetype 4 : Innate immunity component (macrophages and granulocytes mainly)
The fourth archetype is really close to Building block 4 which puts us on a track that tumors from Wagner dataset might be in part explained by the fourth building block.
Let’s do the dissection using the scores in PCA and the coordinates of archetypes in the 3 PC space.
# First, we compute the matrix B which gives the scores of cell types for each archetype
## We need the score the cell types in each PC (c1 dataframe in dudi.pca)
## Our input is a matrix of the coordinates of our archetypes in the PC space (3D space)
# B is the result of a tranSformation of archetypes coordinates in 3PCa to 10-dimension cellular space B = archetypes3PC (dot) Scores3PC
B0 <- compute_cells_prop_archetypes(archCoordw,eigenVectPCA = as.matrix(newPCAw$c1),meansPCA = newPCAw$cent)
cellAbundCorrected <- remove_healthy_archetype(ArchCellsProp = B0,
ArchWeights = archWPCA,
ArchToKeep = c("arch2", "arch3","arch4"),
ArchToRemove = "arch1")#eigenVectPCA = as.matrix(newPCAw$c1)
#checkArch <- scale(t(B0),center=newPCAw$cent,scale=FALSE)%*%as.matrix(newPCAw$c1)
Now, after having done the in silico dissection of samples from Wagner tumors, we can see if the cells proportions are comparable across the datasets with another plot per dataset :
CellsPropsk <- gather(as_tibble(Yk),key = "cellType",value = "proportion")%>%
mutate(dataset = "Keren")
CellsPropsw <- gather(as_tibble(cellAbundCorrected),key = "cellType",value = "proportion")%>%
mutate(dataset = "Wagner")
CellsPropswk <-bind_rows(CellsPropsk,CellsPropsw)
plot2 <-ggplot(data = CellsPropswk,
aes(x = dataset,y = proportion,color = dataset,group = dataset))+
geom_point()+
facet_wrap(~cellType)+
theme(axis.text.x = element_text(angle = 45,hjust = 0.8,vjust = 0.5))+
labs(title = "Boxplot of proportions of cell types from tumors from Keren & Wagner datasets")+
xlab ("") + ylab("proportion")
plot2
The cell types look quite comparable, It means that the purification of Wagner tumors yielded good results (with a little error rate ==> how can we compute this ?) Nonetheless, we observe higher proportions of the type Other Immune in Keren dataset.
newPCACoord <- scale(cellAbundCorrected,center = newPCAw$cent,scale = FALSE)%*%as.matrix(newPCAw$c1)
fig8 <- plot_ly(x = newPCACoord[,1],
y = newPCACoord[,2],
z = newPCACoord[,3],
type = "scatter3d",
mode = "markers",
name ="Wagner dissected tumors",
marker = list(color = "orange3",
symbol = "triangle",size = 6))
fig8 <- fig8 %>% layout(scene = list(xaxis = list(title = "PC1"),
yaxis = list(title = "PC2"),
zaxis = list(title = "PC3")),
title = "PCA on reference cell proportions of Wagner's BC tumors")
## Projection of the building blocks on the PCA space from previous dataset
PCs <- as.matrix(newPCAw$c1)
Ybb2 <- Y%>%filter(group=="keren_micro")%>%column_to_rownames(var="sample_id")%>%dplyr::select(-(group))
rownames(Ybb2) <- c("BB1","BB2","BB3","BB4")
Ybb_proj <- (scale(Ybb2,center = newPCAw$cent,scale = FALSE) %*% as.matrix(newPCAw$c1))
fig8 <- fig8 %>%add_trace(x = as.vector(Ybb_proj[,1]),
y = as.vector(Ybb_proj[,2]),
z = as.vector(Ybb_proj[,3]),
type = "scatter3d",
mode = "markers+text",
color=~rownames(Ybb2),
#text=~rownames(Ybb2),
showlegend = TRUE,
marker = list(symbol = "star-diamond",
color = c("#D463DF", "red","dodgerblue","black"),
size = 12),
inherit = FALSE)
fig8
rm(PCs)
Now, let’s do another PCA on Wagner cells space :
YwPure <- cellAbundCorrected
BCPCAwPure <- dudi.pca(YwPure, scale = FALSE, scannf = FALSE, nf = 3)
fviz_eig(BCPCAwPure)
fviz_pca_biplot(BCPCAwPure,col.var = 'indianred2',col.ind = 'steelblue2',invisible = 'ind', repel = TRUE)
fviz_contrib(BCPCAwPure,choice = "var")
#EpithelialCells2 <- as_tibble(Yw)$EpithelialCells
figPure <- plot_ly(x = BCPCAwPure$li$Axis1,
y = BCPCAwPure$li$Axis2,
type = "scatter",
mode = "markers",
name = "Wagner dissected tumors",
marker = list(color = "orange3",
symbol = "triangle",
size = 6))
figPure <- figPure %>% layout(xaxis = list(title = "PC1"),
yaxis = list(title = "PC2"),
title = "PCA on reference cell proportions of Wagner's BC tumors")
#PC1 <- as.matrix(BCPCAwPure$c1$CS1,ncol=1)
#PC2 <- as.matrix(BCPCAwPure$c1$CS2,ncol=1)
#Yk_proj1 <- (scale(Ybb,center=BCPCAwPure$cent,scale=FALSE) %*% PC1)
#Yk_proj2 <- (scale(Ybb,center=BCPCAwPure$cent,scale=FALSE)%*% PC2)
#BBprojPure <- cbind(Yk_proj1,Yk_proj2)
#BBprojPure<- as_tibble(BBprojPure) %>%dplyr::rename(dim1 = V1)%>%dplyr::rename(dim2=V2)
PC1 <- as.matrix(BCPCAwPure$c1$CS1,ncol=1)
PC2 <- as.matrix(BCPCAwPure$c1$CS2,ncol=1)
Ybb_proj <- (scale(Ybb,center=newPCAw$cent,scale=FALSE) %*% as.matrix(BCPCAwPure$c1,ncol=2))
# Projection of hte building lbocks on PC wagner tcell abundance space
Yk_proj1 <- (scale(Ybb2,center = newPCAw$cent,scale = FALSE) %*% PC1)
Yk_proj2 <- (scale(Ybb2,center = newPCAw$cent,scale = FALSE)%*% PC2)
figPure <- figPure%>%add_trace(x = as.vector(Ybb_proj[,1]),
y = as.vector(Ybb_proj[,2]),
type = "scatter",
mode = "markers",
color=~bblocks,
showlegend = TRUE,
marker = list(symbol = "star-diamond",
color = c("red", "orange","royalblue2","green"),
size = 12),
inherit = FALSE)
figPure
rm(PC1)
rm(PC2)
The biplot shows 3 main axes that would be the vertices of our 2D-simplex (a triangle):
Cancer cells component
Adaptive immunity component (Made in majority by CD4/CD8 T cells and some B cells)
Innate immunity component (Leukocytes including phagocytes such as monocytes and granulocytes)
Projecting the building blocks in this 2D PC space shows some linear relationships between the tumors cells-abundance (macro view) and sites cells-abundance (micro-view). A possible explanation can be that in the experiment conducted by Wagner et al., the samples are prepared as a cells suspension which discard every kind of local architecture. Thus, it is almost impossible to see the TLS structure in terms of only cell abundance. WHereas for Keren et al. experiment, mass cytometry was combined with MIBI (Multiplex Ion Beam imaging) with spatial data which allowed to identify some local structures with ecology-like sampling.
Also, the observed simplex shows the main functions of immunity that shape the immune response against proliferating cancer cells and thus shaping the fate of the tumor.
Now let’s project the proportions we have found on the PCA space from Keren dataset ==> this will allow us to see if cell abundance of tumors (macro architecture) is a linear combination of building blocks :
Building block 1 does not explain a lot of the distribution of cells proportions in Wagner tumors. Maybe, TLSs are too “mixed” with the inflammatory building blocks or there are some interactions between them. Another thing to point out is the numerical instability of some algorithms used for our analyses : the Archetypal analysis induced some inaccurate results probably due to floating points (a common issue in programming) which yields sometimes to probabilities higher than 1, sum of proportions lower than 100 an even negative cell proportions. We kept those results to see how analysis goes but that induces too much biases, so we have to find a solution to tweak this numerical instability that leads to have some tumors with -4 % B cells (which is spurious). We should investigate on a solution to get rid of those rounding errors
write.csv(as.matrix(newPCAw2$li), "./data/PCA_Wagner_4D.csv",row.names=FALSE)
write.csv(as.matrix(newPCAw2$c1), "./data/PCA_Wagner_4components.csv",row.names=FALSE)
write.csv(as.matrix(newPCAw2$cent), "./data/PCA_Wagner_4D_means.csv",row.names=FALSE)
import sys
sys.path.insert(1, '../TMENS_analysis/src/utils')
from archetypes import ArchetypalAnalysis
import pandas as pd
import numpy as np
# Reading the file of PCA from Wagner dataset
pcData = pd.read_csv("./data/PCA_Wagner_4D.csv")
pc3dWagner = np.array(pcData, dtype = "float64")
AA = ArchetypalAnalysis(n_archetypes = 5,
tolerance = 0.001,
max_iter = 200,
random_state = 2,
C = 0.0001,
initialize = 'random',
redundancy_try = 30)
AA.fit_transform(pc3dWagner)
## array([[-3.32799485e+00, -1.35826796e+01, 1.18491325e+00,
## 5.49802634e+00],
## [-4.17004891e+00, -1.84291589e+01, 2.22875701e+00,
## -1.81499856e+00],
## [-5.24253476e+01, 1.25250436e+00, 3.06054683e+00,
## 1.23037525e+00],
## [-2.50840182e+01, 6.49830518e+00, 1.97615455e+00,
## 4.67162595e-01],
## [ 4.76352780e+01, 3.97854367e+01, 2.64188151e+00,
## -8.39904547e-01],
## [ 2.93594723e+01, 2.43127456e+01, -2.79520815e+00,
## 6.45505812e+00],
## [ 2.62941501e+01, -7.62884784e+00, 8.31859943e+00,
## -3.14328545e+00],
## [-3.87678060e+01, 1.41873421e+01, 4.80942507e+00,
## 2.46375505e-01],
## [ 3.37126733e+01, 2.83993004e+01, 4.54936913e+00,
## -2.95372567e+00],
## [-5.47070200e+01, 3.55365029e+00, 4.39204282e+00,
## 3.41546303e-01],
## [ 2.30482006e+01, 1.29657687e+01, 2.04394392e+00,
## -1.77451010e+00],
## [-3.88291681e+01, 4.59210032e-02, 1.90312309e+00,
## 3.67093985e+00],
## [ 5.17411669e+01, 4.90429768e+01, 3.56181330e+00,
## -2.57064626e+00],
## [-2.73080077e+01, -9.71580475e+00, 4.53656209e+00,
## 5.07862910e-01],
## [-1.86047760e+01, 5.50911199e+00, 5.80297194e+00,
## -5.65007101e-01],
## [-1.83115483e+01, 1.56069471e+01, -2.17252555e-01,
## -1.00496547e-01],
## [-4.93664981e+01, 8.95446921e-01, 2.56110156e+00,
## 1.13235766e-01],
## [ 1.27121176e+01, -6.33481121e+00, -2.02314931e+01,
## -2.84731141e+00],
## [ 2.64424071e+01, -3.23760050e+01, 7.97822835e+00,
## -9.98341176e-01],
## [ 2.17157626e+01, 1.62116966e+01, 7.33571982e-01,
## 2.13169893e+00],
## [ 9.08819142e+00, -1.14665356e+01, -2.62882497e+01,
## 3.18051235e+00],
## [-3.08484107e+01, 3.93606089e+00, 2.35019096e+00,
## 2.79385799e+00],
## [-2.21493839e+01, -1.21051184e+01, 5.14327406e+00,
## 6.55803755e-01],
## [ 2.70470775e+01, 3.43810278e+01, 1.32101762e+00,
## 1.34509257e+00],
## [ 1.45996289e+01, -9.60837379e+00, 2.11952788e+00,
## 3.87991447e+00],
## [ 1.92891505e+01, 1.51207910e+01, -8.64178672e-01,
## 2.63651351e+00],
## [ 2.93731132e+01, 2.36203870e+01, 2.49556604e+00,
## 8.14046340e-01],
## [ 1.81723042e+01, 2.16704693e+01, 1.21652417e-01,
## 6.87605498e-01],
## [ 5.00085947e+01, 4.45744079e+01, 2.50072613e+00,
## -1.56448373e+00],
## [ 3.79532925e+01, 2.67296657e+00, 4.70201159e+00,
## -2.07520353e-01],
## [ 3.25672953e+01, -5.76911543e-01, 5.49683021e+00,
## -7.30278005e-01],
## [ 7.22267083e+00, -7.22514639e+00, 1.06335698e+00,
## -2.92224362e-01],
## [ 2.17680103e+00, -2.08679954e+01, 2.65003974e+00,
## -3.46505318e+00],
## [-1.91842616e+01, 7.82178063e+00, 5.02572862e+00,
## 7.69241162e-02],
## [ 2.85848430e+01, -8.24296507e+00, 5.51278183e+00,
## -2.15017373e-01],
## [ 3.29020187e+01, -5.44170959e+00, -2.05287662e+00,
## 7.73408894e+00],
## [ 5.99996804e+00, 2.37045347e+01, 9.66464980e-01,
## -1.98932886e-01],
## [-1.94972469e+00, -1.68829177e+01, 2.67274254e+00,
## -3.91359595e-01],
## [-1.36532265e+01, 2.06423183e+01, 4.53100043e+00,
## -7.89791454e-01],
## [ 6.32017564e+00, 3.16722018e+01, 4.17414137e+00,
## -1.15085418e+00],
## [ 2.45890872e+01, 2.29848705e+01, 3.56969960e+00,
## -3.17546154e+00],
## [ 3.33921652e+01, 3.83255543e+01, 3.16184820e+00,
## -2.27681457e+00],
## [ 2.93504624e+01, -2.88724206e+01, 7.21411886e+00,
## -6.77135982e+00],
## [ 3.83146091e+01, 4.19122895e+01, 3.18634016e+00,
## -2.49450603e+00],
## [ 2.67052439e+01, 2.91442242e+01, -5.02697009e+00,
## -5.44114182e+00],
## [-5.85510033e+01, 4.06219573e+00, 3.33336113e+00,
## 8.16949601e-01],
## [-1.96198740e+00, 1.62572272e+01, 1.32400101e+00,
## 1.39921531e+00],
## [ 1.13752941e+00, -2.08334218e+01, 3.09584257e+00,
## 1.91488692e+00],
## [ 2.17293448e+01, -1.16608260e+01, -9.64043393e+00,
## -1.69307072e+00],
## [ 3.60085913e+01, 1.13991200e+01, 2.48410842e+00,
## 1.58524035e+00],
## [ 3.92231383e+01, 2.79454085e+01, 2.72260139e+00,
## -3.66588682e+00],
## [ 1.62287108e+00, 1.96920281e+01, -9.01814984e-01,
## -3.47038902e+00],
## [ 7.27307741e-01, 1.64091923e+01, -1.36980196e+00,
## 2.79281106e+00],
## [-6.36558119e+01, 4.91052962e+00, 5.11748495e+00,
## 1.03649350e+00],
## [ 1.02454375e+01, -1.31717103e+00, 6.67406800e-01,
## -5.91168021e-01],
## [-7.09257493e+00, 2.05230045e+01, 4.08137606e-01,
## -1.01022363e+00],
## [-1.84122889e+01, 4.88651087e+00, 3.83362421e+00,
## 1.28569433e+00],
## [-4.42540674e+01, 6.34321138e+00, 3.26777537e+00,
## 1.04171150e-01],
## [ 2.06839300e+01, 2.13276566e+01, 2.93897270e+00,
## 6.60557431e-01],
## [-7.50373839e-01, -1.06250209e+01, 3.16972480e+00,
## -4.66203870e+00],
## [-7.05155496e+00, -9.64270809e+00, -1.85081601e+01,
## -1.09251621e+01],
## [ 1.81678475e+01, 2.56243907e+01, -7.88861457e-01,
## -2.12296594e-01],
## [-1.78083485e+00, 5.32230110e+00, 2.47546446e+00,
## 2.72077390e+00],
## [ 3.20601083e+01, -3.71893517e+01, 1.23126563e+01,
## -6.06429941e+00],
## [ 3.37758281e-01, 1.88274203e+01, 6.82632353e-01,
## 3.13708384e+00],
## [-2.12212486e+01, -4.61060207e+00, 1.55180685e+00,
## 2.90915161e+00],
## [ 3.73409112e+01, 3.60239924e+00, -6.22528420e+00,
## 1.14898244e+01],
## [-2.14396133e+01, 1.03585299e+01, 1.70292902e+00,
## -1.19982533e+00],
## [ 3.82498686e+01, -8.45694753e+00, 7.40789687e+00,
## -2.32205389e+00],
## [ 3.44159207e+01, 1.68772075e+01, 3.66843915e+00,
## -7.66888904e-02],
## [-3.21806049e+01, -4.50452159e+00, 8.00676563e-01,
## -2.99214716e+00],
## [-3.44693905e+01, -1.69628400e+00, 1.08300159e+00,
## 5.31953090e+00],
## [ 2.82512218e+01, 3.18571590e+01, 2.10085533e+00,
## -3.09030146e+00],
## [-1.31270657e+01, 1.30959838e+01, 2.67075700e+00,
## -2.15188504e-01],
## [ 3.27299245e+01, -3.07434640e+01, 7.72958276e+00,
## -9.63030441e-01],
## [ 3.68490458e+00, -1.89484216e+01, 6.02814617e+00,
## 5.47244382e-01],
## [ 1.24542610e+01, -1.56021774e+01, -3.64971428e+00,
## 5.23346532e-01],
## [ 2.61553452e+01, -3.20623263e+01, 6.17562984e+00,
## 9.23132899e-01],
## [ 2.48065457e+01, -3.41663812e+01, 1.22186876e+01,
## -5.40054471e+00],
## [-1.55784494e+01, 5.68643620e+00, -1.35589249e+00,
## -1.09522457e+00],
## [ 3.52601110e+00, -1.71424415e+01, 7.68868952e+00,
## -1.49967252e+00],
## [ 2.37145809e+00, -8.04898944e+00, 2.58931558e+00,
## -4.03493654e+00],
## [-1.52441218e+01, -8.80452180e+00, -2.33840794e+00,
## -2.95621142e+00],
## [-8.29415052e+00, -1.07402951e+01, -1.31333302e+01,
## 5.46987534e+00],
## [ 1.01952020e+01, -2.02129925e+01, 9.33534617e+00,
## -4.22561301e+00],
## [ 2.42302723e+01, -2.03400073e+01, 2.98386044e+00,
## -3.40701093e+00],
## [ 3.28564944e+01, -3.59612122e+01, 1.26645412e+01,
## -5.89760960e+00],
## [-7.89678661e+00, -1.21539185e+01, -1.01422729e+01,
## -4.95711309e+00],
## [-5.25310014e+01, 2.24467776e+00, 3.04254779e+00,
## 7.57151929e-01],
## [ 1.22799711e+01, -1.06825052e+01, 1.73402501e+00,
## -1.19014576e+00],
## [-2.64607908e+01, 5.35729729e+00, 2.72919393e-01,
## -1.36035579e-01],
## [ 1.39209429e+01, -2.84503831e+01, 9.10687469e+00,
## -2.26666415e+00],
## [-7.47200331e+00, -7.18220362e+00, -2.46734308e+01,
## -1.26016760e+01],
## [-1.49812345e+00, -1.12612724e+01, 1.40137557e+00,
## 1.69169283e+00],
## [-3.67000264e+01, -7.29204996e-01, 3.87804845e+00,
## -8.66009465e-01],
## [-5.66781469e+01, 5.11353267e+00, 4.65012110e+00,
## 1.03810658e+00],
## [ 3.64898580e+00, -1.11896671e+01, -2.05834947e+00,
## 1.14952710e+00],
## [ 2.64528848e+01, -1.73152490e+01, -6.20844982e+00,
## 3.68397212e+00],
## [ 1.01705586e+00, 1.74027571e+01, -5.87812154e+00,
## -4.13139036e+00],
## [ 1.25540508e+01, -2.56965857e+01, 3.74015343e+00,
## 1.75371689e+00],
## [-3.33447264e+01, -1.61349338e+00, -1.09547062e+01,
## -6.32129691e+00],
## [-2.25225845e+01, 4.37354436e+00, -1.39736478e+00,
## -1.72525296e+00],
## [ 1.70854832e+01, -9.19287668e+00, -2.17919453e+01,
## 8.39643695e+00],
## [-1.62472656e+01, 3.50325111e+00, 1.72355162e+00,
## 1.55624780e+00],
## [ 3.73867191e+01, 1.01773529e+01, -2.27725993e+00,
## 2.92175305e+00],
## [-1.02402624e+01, -1.36440107e+00, -1.76578334e+01,
## -7.42247161e+00],
## [-6.20749585e+01, 5.12973667e+00, 4.88862351e+00,
## 9.84142884e-01],
## [-7.96569556e+00, -1.30229794e+00, -9.46754703e+00,
## -1.09965346e+00],
## [-4.68478029e+01, 1.82779253e+00, -3.33227315e+00,
## -2.85422697e+00],
## [-4.71265682e+00, 1.31898599e+00, -4.48536702e+00,
## 3.41914643e+00],
## [-1.04490722e+00, -1.17367113e+01, -2.18586813e+00,
## 1.27606968e+00],
## [-3.75522426e+01, 5.88836126e+00, 1.58755661e+00,
## 1.60402802e+00],
## [-2.49765577e+01, 1.71077642e+00, 1.73194618e+00,
## 2.02763151e+00],
## [-6.03080815e+01, 4.18997723e+00, 3.34237393e+00,
## 2.23853892e-01],
## [ 2.66563369e+01, -3.20994843e+01, 5.25443845e+00,
## 1.90748315e+00],
## [ 3.01581792e+01, -3.11178062e+01, -7.00663286e-01,
## 8.17812876e+00],
## [ 2.23738564e+01, -2.83750601e+01, -2.32069196e-01,
## -2.10444259e+00],
## [-3.88008258e+01, -1.54592983e+00, 2.20183528e-01,
## 1.95165858e+00],
## [ 1.52273870e+01, -9.96199276e+00, 4.31319305e+00,
## -2.03308363e+00],
## [-5.33966677e+01, 5.28223974e+00, 3.49845674e+00,
## 8.37295430e-01],
## [ 1.57280494e+00, -1.79647285e+01, 4.38159876e+00,
## -2.16254407e+00],
## [-2.82929410e+01, -2.87236003e+00, -1.30552450e+01,
## -5.73765159e+00],
## [ 2.00256037e+01, -2.80939224e+01, 1.19739186e+00,
## -7.47512967e+00],
## [ 2.13986390e+01, -1.50615395e+01, 2.04366478e+00,
## 4.23016360e+00],
## [ 2.59463458e+01, -1.95237142e+01, -2.50444648e+01,
## 3.36037195e+01],
## [-3.25814715e+01, 5.12973130e+00, 3.50031629e+00,
## 1.50763298e+00],
## [ 2.65289486e+01, -7.34807895e+00, -4.63288309e+00,
## 1.56073675e+00],
## [ 2.92707729e+01, -8.67083751e-03, -4.39858235e+00,
## -3.13273953e+00],
## [-1.17919724e+01, 6.33255640e+00, -1.95417984e-02,
## 2.17402119e+00],
## [-7.12269500e+00, 8.09311452e+00, -4.94564746e+00,
## 9.84610550e+00],
## [-2.27766058e+01, 3.45649353e-01, -2.81445266e+00,
## -3.13477470e+00],
## [-2.11097374e+00, -8.16178751e+00, -1.02827062e+00,
## 2.68527384e+00],
## [ 3.60199494e+01, 2.78052871e+01, -1.69284336e+00,
## 4.42455860e-01],
## [ 9.57564181e+00, -9.62141214e+00, -1.78728091e+01,
## 3.60031814e+00],
## [-4.97999087e+01, 6.91827880e+00, 3.00916309e+00,
## 1.81868307e+00],
## [-7.95937755e+00, 1.63460290e+01, 2.38170267e-01,
## -3.35897822e-01],
## [ 3.80677379e+00, 4.92772806e+00, -4.04305739e-01,
## 3.60047223e+00],
## [-4.13271345e+01, 1.65325158e+00, 2.97176994e+00,
## 5.11489433e-01],
## [-2.39315912e+01, -1.03951079e+01, 2.68445277e+00,
## 2.58531240e+00],
## [-3.30343804e+01, 2.77389062e+00, 1.13074577e+00,
## 3.71745961e+00],
## [ 2.58502451e+01, -2.24526146e+01, 1.29317993e+00,
## 5.26779182e-01],
## [ 2.86640076e+01, -2.22425826e+01, 4.42054391e+00,
## -6.85987771e+00],
## [-4.05451530e+01, -5.35767426e-01, -5.71826098e+00,
## -2.09663062e+00]])
archetypes_wagner = AA.alfa
pd.DataFrame(AA.archetypes).to_csv("./outputs/Coordinates_Archetypes_Wagner_4PCA.csv")
pd.DataFrame(archetypes_wagner).to_csv("./outputs/Archetypes_Wagner_4PCA.csv")
#Get the probabilities to belong to the 4 archetypes for each tumor in Wagner dataset (4 x 143)
archWPCA2 <- as_tibble(read.csv("./outputs/Archetypes_Wagner_4PCA.csv",header=TRUE))
archWPCA2 <- archWPCA2%>%column_to_rownames(var = "X")
rownames(archWPCA2) <- c("arch1", "arch2","arch3","arch4","arch5")
## Checking that the sum is equal to 1 : probability to belong to one of the archetypes
#colSums(archWPCA)
colnames(archWPCA2) <- rownames(CellsPropBC2)
archCoordw2 <- as.matrix(read.csv("./outputs/Coordinates_Archetypes_Wagner_4PCA.csv", header = TRUE))
archCoordw2 <- archCoordw2[,-1]
colnames(archCoordw2) <- c("arch1", "arch2","arch3","arch4","arch5")
## Plot the archetypes in Wagner PCA space
figNew4 <- plot_ly(x = newPCAw2$li$Axis1,
y = newPCAw2$li$Axis2,
z = newPCAw2$li$Axis3,
showlegend = TRUE,
type = "scatter3d",
mode = "markers",
marker = list(color = "dodgerblue",symbol = "triangle",size = 6),
opacity = 0.5,
mode = "text",
name = "tumors")
figNew4 <- figNew4 %>% add_trace(x = archCoordw2[1,],
y = archCoordw2[2,],
z = archCoordw2[3,],
type = "scatter3d",mode = "markers+text",
text = colnames(archCoordw2),#c("arch. 1", "arch. 2","arch. 3","arch. 4"),
textposition = c('top right','top right','top left','top right',"top right"),
textfont = list(color = '#000000', size = 16),
showlegend = TRUE,
name = "Archetypes",
marker = list(color = c("#34A6E8","#48BF3B",
"#F54505","#E8C434","#5F147A"),
symbol = "star-diamond",size = 14),
inherit = FALSE)
figNew4 <- figNew4 %>% layout(scene = list(xaxis = list(title = "PC 1"), yaxis = list(title = "PC 2"), zaxis = list(title = "PC 3") ),
title = "PCA on reference cell proportions space of BC tumors")
figNew4
ArchCellsProp2 <- t(archCoordw2)%*% t(as.matrix(newPCAw2$c1))#t(scale(t(as.matrix(newPCAw$c1)),center=-(newPCAw$cent),scale=FALSE)) %*% archCoordw
## Decentering the matrix to obtain percentages of cell types ==> the sum for each archetype must be =0
ArchCellsProp0.2 <- apply(t(ArchCellsProp2), 2, function(x) x+newPCAw2$cent)
ArchCellsProp0.2 <- as_tibble(ArchCellsProp0.2,rownames = NA) %>%
rownames_to_column(var = "cell_type") %>%
pivot_longer(cols = c(arch1,arch2,arch3,arch4,arch5),
names_to = "archetype",
values_to = "percentage")
ggplot(data = ArchCellsProp0.2,
aes(x = cell_type,y = percentage,group = archetype,fill = archetype))+
geom_bar(stat = "identity",position = position_dodge(),width = 0.6)+
scale_fill_manual("Archetype",values = c("arch1" = "#34A6E8",
"arch2" = "#48BF3B",
"arch3" = "#F54505",
"arch4" = "#E8C434",
"arch5" = "#5F147A"))+
theme(axis.text.x = element_text(angle = 80, vjust = .5))+
xlab ("") + ylab("% tumor cellular composition")
B.5d <- compute_cells_prop_archetypes(archCoordw2,eigenVectPCA = as.matrix(newPCAw2$c1),meansPCA = newPCAw2$cent)
Cells4A.dissected <- remove_healthy_archetype(ArchCellsProp = B.5d,
ArchWeights = archWPCA2,
ArchToKeep = c("arch1", "arch2","arch4","arch5"),
ArchToRemove = "arch3")
Cells4A.dissected2 <- as_tibble(Cells4A.dissected,rownames = NA)%>%
pivot_longer(cols = c(EndothelialCells,EpithelialCells,
`Mesenchymal-like`,NK,B,DC,`CD4-T`,
`CD8-T`,Macrophages,OtherImmune),
names_to = "cell_type",
values_to = "proportion")
ggplot(data = Cells4A.dissected2,aes(x = cell_type, y = proportion,fill = cell_type))+
geom_boxplot()+ #stat="identity",position = position_dodge(),width = 0.6
theme(axis.text.x = element_text(angle = 90, vjust = .3,hjust = 0.1))+
xlab ("") + ylab("% tumors cellular composition")
NewCells4Arch.PCA <- dudi.pca(Cells4A.dissected,scale=FALSE,center=TRUE,
scannf=FALSE,nf=3)
fviz_eig(NewCells4Arch.PCA)
figNew5 <- plot_ly(x = NewCells4Arch.PCA$li$Axis1,
y = NewCells4Arch.PCA$li$Axis2,
z = NewCells4Arch.PCA$li$Axis3,
# color= ~StatusTumor2$Status,#as.vector(AgeResection2$`Age at Surgery`),#MesenchymalLike,
showlegend = TRUE,
type = "scatter3d",
mode = "markers",
marker = list(color = "dodgerblue",symbol = "triangle",size = 6),
opacity = 0.5,
mode = "text",
name = "tumors")
## Projection of the building blocks on the PCA space from previous dataset
PCs <- as.matrix(NewCells4Arch.PCA$c1)
Ybb_proj <- (scale(Ybb,center = NewCells4Arch.PCA$cent,scale = FALSE) %*% as.matrix(NewCells4Arch.PCA$c1))
figNew5 <- figNew5 %>% add_trace(x = as.vector(Ybb_proj[,1]),
y = as.vector(Ybb_proj[,2]),
z = as.vector(Ybb_proj[,3]),
type = "scatter3d",
mode = "markers",
color=~bblocks,
showlegend = TRUE,
marker = list(symbol = "star-diamond",
color = c("#D463DF", "red","dodgerblue","black"),
size = 12),
inherit = FALSE)
figNew5 <- figNew5 %>% layout(scene = list(xaxis = list(title = "PC 1"),
yaxis = list(title = "PC 2"),
zaxis = list(title = "PC 3") ),
title = "PCA on reference cell proportions space of BC tumors")
figNew5
rm(PCs)
We will perform a PCA on both of the datasets from Keren and Wagner paper. Here, we are going to find if the two datasets share the same archetypes. And if the archetypes are likely to be Building blocks, this would mean that they can explain the cellular composition of breast cancer tumors.
Bridging macro and micro architecture of BC tumors shows that the building blocks can possibly linearly explain macro architecture of BC tumors
Building block 1 (TLSs) is not a visible archetype from a macro view of tumors in cell abundance space ==> correlates with inflammatory block
Rounding errors might induce spurious and biologically wrong results
Different datasets are difficult to compare due to different markers/techniques used
The next step consists in doing a multi-linear regression in order to see if we can predict cell composition of tumors by a weighted sum (linear combination) of building blocks observed from a microscopic perspective
#WagnerKerenData2 <- list("MacroTumorsW"= YwPure,"BBs" = Ybb,"KerenTumors"= Yk,"WagnerPCA" = BCPCAwPure)#,"KerenbbPCA" = newPCAk)#,"DensityBBs" = rowSums(DataKeren),"MetadataWagner" = AgeResection2)
WagnerKerenData <- list ("CellAbundanceBC" = Y, # Dataframe containing cellular abundance (in %) from keren, wagner article and building blocks.
"WagnerPCA" = scBCPCA.pure) # PCA of the dissected tumors in the new reference space (cell types mapped across datasets)
saveRDS(WagnerKerenData,"./outputs/MacroMicroData.rds")